#include "stdafx.h"
/*
 * -- SuperLU routine (version 2.0) --
 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
 * and Lawrence Berkeley National Lab.
 * November 15, 1997
 *
 */

#include "hnum_pdsp_defs.h"
#include "hnum_colamd.h"
#include "hnum_cblas.h"


namespace harlinn
{
    namespace numerics
    {
        namespace SuperLU
        {
            void get_colamd(
	               const int m,  /* number of rows in matrix A. */
	               const int n,  /* number of columns in matrix A. */
	               const int nnz,/* number of nonzeros in matrix A. */
	               int *colptr,  /* column pointer of size n+1 for matrix A. */
	               int *rowind,  /* row indices of size nz for matrix A. */
	               int *perm_c   /* out - the column permutation vector. */
	               )
            {
                int Alen, *A, i, info, *p;
                double *knobs;

                Alen = colamd_recommended(nnz, m, n);

                if ( !(knobs = (double *) SUPERLU_MALLOC(COLAMD_KNOBS * sizeof(double))) )
                    SUPERLU_ABORT("Malloc fails for knobs");
                colamd_set_defaults(knobs);

                if (!(A = (int *) SUPERLU_MALLOC(Alen * sizeof(int))) )
                    SUPERLU_ABORT("Malloc fails for A[]");
                if (!(p = (int *) SUPERLU_MALLOC((n+1) * sizeof(int))) )
                    SUPERLU_ABORT("Malloc fails for p[]");
                for (i = 0; i <= n; ++i) p[i] = colptr[i];
                for (i = 0; i < nnz; ++i) A[i] = rowind[i];
                info = colamd(m, n, Alen, A, p, knobs);
                if ( info == FALSE ) SUPERLU_ABORT("COLAMD failed");

                for (i = 0; i < n; ++i) perm_c[p[i]] = i;

                SUPERLU_FREE(knobs);
                SUPERLU_FREE(A);
                SUPERLU_FREE(p);
            }

            void
            getata(
                   const int m,      /* number of rows in matrix A. */
                   const int n,      /* number of columns in matrix A. */
                   const int nz,     /* number of nonzeros in matrix A */
                   int *colptr,      /* column pointer of size n+1 for matrix A. */
                   int *rowind,      /* row indices of size nz for matrix A. */
                   int *atanz,       /* out - on exit, returns the actual number of
                                        nonzeros in matrix A'*A. */
                   int **ata_colptr, /* out - size n+1 */
                   int **ata_rowind  /* out - size *atanz */
                   )
            /*
             * Purpose
             * =======
             *
             * Form the structure of A'*A. A is an m-by-n matrix in column oriented
             * format represented by (colptr, rowind). The output A'*A is in column
             * oriented format (symmetrically, also row oriented), represented by
             * (ata_colptr, ata_rowind).
             *
             * This routine is modified from GETATA routine by Tim Davis.
             * The complexity of this algorithm is: SUM_{i=1,m} r(i)^2,
             * i.e., the sum of the square of the row counts.
             *
             * Questions
             * =========
             *     o  Do I need to withhold the *dense* rows?
             *     o  How do I know the number of nonzeros in A'*A?
             * 
             */
            {
                register int i, j, k, col, num_nz, ti, trow;
                int *marker, *b_colptr, *b_rowind;
                int *t_colptr, *t_rowind; /* a column oriented form of T = A' */

                if (!(marker = (int*)SUPERLU_MALLOC( (SUPERLU_MAX(m,n)+1) * sizeof(int)) ))
	            SUPERLU_ABORT("SUPERLU_MALLOC fails for marker[]");
                if ( !(t_colptr = (int*) SUPERLU_MALLOC( (m+1) * sizeof(int)) ) )
	            SUPERLU_ABORT("SUPERLU_MALLOC t_colptr[]");
                if ( !(t_rowind = (int*) SUPERLU_MALLOC( nz * sizeof(int)) ) )
	            SUPERLU_ABORT("SUPERLU_MALLOC fails for t_rowind[]");

    
                /* Get counts of each column of T, and set up column pointers */
                for (i = 0; i < m; ++i) marker[i] = 0;
                for (j = 0; j < n; ++j) {
	            for (i = colptr[j]; i < colptr[j+1]; ++i)
	                ++marker[rowind[i]];
                }
                t_colptr[0] = 0;
                for (i = 0; i < m; ++i) {
	            t_colptr[i+1] = t_colptr[i] + marker[i];
	            marker[i] = t_colptr[i];
                }

                /* Transpose the matrix from A to T */
                for (j = 0; j < n; ++j)
	            for (i = colptr[j]; i < colptr[j+1]; ++i) {
	                col = rowind[i];
	                t_rowind[marker[col]] = j;
	                ++marker[col];
	            }

    
                /* ----------------------------------------------------------------
                   compute B = T * A, where column j of B is:

                   Struct (B_*j) =    UNION   ( Struct (T_*k) )
                                    A_kj != 0

                   do not include the diagonal entry
   
                   ( Partition A as: A = (A_*1, ..., A_*n)
                     Then B = T * A = (T * A_*1, ..., T * A_*n), where
                     T * A_*j = (T_*1, ..., T_*m) * A_*j.  )
                   ---------------------------------------------------------------- */

                /* Zero the diagonal flag */
                for (i = 0; i < n; ++i) marker[i] = -1;

                /* First pass determines number of nonzeros in B */
                num_nz = 0;
                for (j = 0; j < n; ++j) {
	            /* Flag the diagonal so it's not included in the B matrix */
	            marker[j] = j;

	            for (i = colptr[j]; i < colptr[j+1]; ++i) {
	                /* A_kj is nonzero, add pattern of column T_*k to B_*j */
	                k = rowind[i];
	                for (ti = t_colptr[k]; ti < t_colptr[k+1]; ++ti) {
		            trow = t_rowind[ti];
		            if ( marker[trow] != j ) {
		                marker[trow] = j;
		                num_nz++;
		            }
	                }
	            }
                }
                *atanz = num_nz;
    
                /* Allocate storage for A'*A */
                if ( !(*ata_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
	            SUPERLU_ABORT("SUPERLU_MALLOC fails for ata_colptr[]");
                if ( *atanz ) {
	            if ( !(*ata_rowind = (int*) SUPERLU_MALLOC( *atanz * sizeof(int)) ) )
	                SUPERLU_ABORT("SUPERLU_MALLOC fails for ata_rowind[]");
                }
                b_colptr = *ata_colptr; /* aliasing */
                b_rowind = *ata_rowind;
    
                /* Zero the diagonal flag */
                for (i = 0; i < n; ++i) marker[i] = -1;
    
                /* Compute each column of B, one at a time */
                num_nz = 0;
                for (j = 0; j < n; ++j) {
	            b_colptr[j] = num_nz;
	
	            /* Flag the diagonal so it's not included in the B matrix */
	            marker[j] = j;

	            for (i = colptr[j]; i < colptr[j+1]; ++i) {
	                /* A_kj is nonzero, add pattern of column T_*k to B_*j */
	                k = rowind[i];
	                for (ti = t_colptr[k]; ti < t_colptr[k+1]; ++ti) {
		            trow = t_rowind[ti];
		            if ( marker[trow] != j ) {
		                marker[trow] = j;
		                b_rowind[num_nz++] = trow;
		            }
	                }
	            }
                }
                b_colptr[n] = num_nz;
       
                SUPERLU_FREE(marker);
                SUPERLU_FREE(t_colptr);
                SUPERLU_FREE(t_rowind);
            }


            void
            at_plus_a(
	              const int n,      /* number of columns in matrix A. */
	              const int nz,     /* number of nonzeros in matrix A */
	              int *colptr,      /* column pointer of size n+1 for matrix A. */
	              int *rowind,      /* row indices of size nz for matrix A. */
	              int *bnz,         /* out - on exit, returns the actual number of
                                           nonzeros in matrix A'*A. */
	              int **b_colptr,   /* out - size n+1 */
	              int **b_rowind    /* out - size *bnz */
	              )
            {
            /*
             * Purpose
             * =======
             *
             * Form the structure of A'+A. A is an n-by-n matrix in column oriented
             * format represented by (colptr, rowind). The output A'+A is in column
             * oriented format (symmetrically, also row oriented), represented by
             * (b_colptr, b_rowind).
             *
             */
                register int i, j, k, col, num_nz;
                int *t_colptr, *t_rowind; /* a column oriented form of T = A' */
                int *marker;

                if ( !(marker = (int*) SUPERLU_MALLOC( n * sizeof(int)) ) )
	            SUPERLU_ABORT("SUPERLU_MALLOC fails for marker[]");
                if ( !(t_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
	            SUPERLU_ABORT("SUPERLU_MALLOC fails for t_colptr[]");
                if ( !(t_rowind = (int*) SUPERLU_MALLOC( nz * sizeof(int)) ) )
	            SUPERLU_ABORT("SUPERLU_MALLOC fails t_rowind[]");

    
                /* Get counts of each column of T, and set up column pointers */
                for (i = 0; i < n; ++i) marker[i] = 0;
                for (j = 0; j < n; ++j) {
	            for (i = colptr[j]; i < colptr[j+1]; ++i)
	                ++marker[rowind[i]];
                }
                t_colptr[0] = 0;
                for (i = 0; i < n; ++i) {
	            t_colptr[i+1] = t_colptr[i] + marker[i];
	            marker[i] = t_colptr[i];
                }

                /* Transpose the matrix from A to T */
                for (j = 0; j < n; ++j)
	            for (i = colptr[j]; i < colptr[j+1]; ++i) {
	                col = rowind[i];
	                t_rowind[marker[col]] = j;
	                ++marker[col];
	            }


                /* ----------------------------------------------------------------
                   compute B = A + T, where column j of B is:

                   Struct (B_*j) = Struct (A_*k) UNION Struct (T_*k)

                   do not include the diagonal entry
                   ---------------------------------------------------------------- */

                /* Zero the diagonal flag */
                for (i = 0; i < n; ++i) marker[i] = -1;

                /* First pass determines number of nonzeros in B */
                num_nz = 0;
                for (j = 0; j < n; ++j) {
	            /* Flag the diagonal so it's not included in the B matrix */
	            marker[j] = j;

	            /* Add pattern of column A_*k to B_*j */
	            for (i = colptr[j]; i < colptr[j+1]; ++i) {
	                k = rowind[i];
	                if ( marker[k] != j ) {
		            marker[k] = j;
		            ++num_nz;
	                }
	            }

	            /* Add pattern of column T_*k to B_*j */
	            for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) {
	                k = t_rowind[i];
	                if ( marker[k] != j ) {
		            marker[k] = j;
		            ++num_nz;
	                }
	            }
                }
                *bnz = num_nz;
    
                /* Allocate storage for A+A' */
                if ( !(*b_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
	            SUPERLU_ABORT("SUPERLU_MALLOC fails for b_colptr[]");
                if ( *bnz) {
                  if ( !(*b_rowind = (int*) SUPERLU_MALLOC( *bnz * sizeof(int)) ) )
	            SUPERLU_ABORT("SUPERLU_MALLOC fails for b_rowind[]");
                }
    
                /* Zero the diagonal flag */
                for (i = 0; i < n; ++i) marker[i] = -1;
    
                /* Compute each column of B, one at a time */
                num_nz = 0;
                for (j = 0; j < n; ++j) {
	            (*b_colptr)[j] = num_nz;
	
	            /* Flag the diagonal so it's not included in the B matrix */
	            marker[j] = j;

	            /* Add pattern of column A_*k to B_*j */
	            for (i = colptr[j]; i < colptr[j+1]; ++i) {
	                k = rowind[i];
	                if ( marker[k] != j ) {
		            marker[k] = j;
		            (*b_rowind)[num_nz++] = k;
	                }
	            }

	            /* Add pattern of column T_*k to B_*j */
	            for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) {
	                k = t_rowind[i];
	                if ( marker[k] != j ) {
		            marker[k] = j;
		            (*b_rowind)[num_nz++] = k;
	                }
	            }
                }
                (*b_colptr)[n] = num_nz;
       
                SUPERLU_FREE(marker);
                SUPERLU_FREE(t_colptr);
                SUPERLU_FREE(t_rowind);
            }

            void
            get_perm_c(int ispec, SuperMatrix *A, int *perm_c)
            /*
             * Purpose
             * =======
             *
             * GET_PERM_C obtains a permutation matrix Pc, by applying the multiple
             * minimum degree ordering code by Joseph Liu to matrix A'*A or A+A'.
             * or using approximate minimum degree column ordering by Davis et. al.
             * The LU factorization of A*Pc tends to have less fill than the LU 
             * factorization of A.
             *
             * Arguments
             * =========
             *
             * ispec   (input) int
             *         Specifies the type of column ordering to reduce fill:
             *         = 1: minimum degree on the structure of A^T * A
             *         = 2: minimum degree on the structure of A^T + A
             *         = 3: approximate minimum degree for unsymmetric matrices
             *         If ispec == 0, the natural ordering (i.e., Pc = I) is returned.
             * 
             * A       (input) SuperMatrix*
             *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
             *         of the linear equations is A->nrow. Currently, the type of A 
             *         can be: Stype = NC; Dtype = _D; Mtype = GE. In the future,
             *         more general A can be handled.
             *
             * perm_c  (output) int*
             *	   Column permutation vector of size A->ncol, which defines the 
             *         permutation matrix Pc; perm_c[i] = j means column i of A is 
             *         in position j in A*Pc.
             *
             */
            {
                NCformat *Astore = (NCformat *)A->Store;
                int m, n, bnz, *b_colptr, i;
                int delta, maxint, nofsub, *invp;
                int *b_rowind, *dhead, *qsize, *llist, *marker;
                double t, SuperLU_timer_();
    
                m = A->nrow;
                n = A->ncol;

                t = SuperLU_timer_();
                switch ( ispec ) {
                    case 0: /* Natural ordering */
	                  for (i = 0; i < n; ++i) perm_c[i] = i;
	                  printf("Use natural column ordering.\n");
	                  return;
                    case 1: /* Minimum degree ordering on A'*A */
	                  getata(m, n, Astore->nnz, Astore->colptr, Astore->rowind,
		                 &bnz, &b_colptr, &b_rowind);
	                  printf("Use minimum degree ordering on A'*A.\n");
	                  t = SuperLU_timer_() - t;
	                  /*printf("Form A'*A time = %8.3f\n", t);*/
	                  break;
                    case 2: /* Minimum degree ordering on A'+A */
	                  if ( m != n ) SUPERLU_ABORT("Matrix is not square");
	                  at_plus_a(n, Astore->nnz, Astore->colptr, Astore->rowind,
			            &bnz, &b_colptr, &b_rowind);
	                  printf("Use minimum degree ordering on A'+A.\n");
	                  t = SuperLU_timer_() - t;
	                  /*printf("Form A'+A time = %8.3f\n", t);*/
	                  break;
                    case 3: /* Approximate minimum degree column ordering. */
	                  get_colamd(m, n, Astore->nnz, Astore->colptr, Astore->rowind,
			             perm_c);
	                  printf(".. Use approximate minimum degree column ordering.\n");
	                  return; 
                    default:
	                  SUPERLU_ABORT("Invalid ISPEC");
                }

                if ( bnz != 0 ) {
	            t = SuperLU_timer_();

	            /* Initialize and allocate storage for GENMMD. */
	            delta = 0; /* DELTA is a parameter to allow the choice of nodes
		                  whose degree <= min-degree + DELTA. */
	            maxint = 2147483647; /* 2**31 - 1 */
	            invp = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
	            if ( !invp ) SUPERLU_ABORT("SUPERLU_MALLOC fails for invp.");
	            dhead = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
	            if ( !dhead ) SUPERLU_ABORT("SUPERLU_MALLOC fails for dhead.");
	            qsize = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
	            if ( !qsize ) SUPERLU_ABORT("SUPERLU_MALLOC fails for qsize.");
	            llist = (int *) SUPERLU_MALLOC(n*sizeof(int));
	            if ( !llist ) SUPERLU_ABORT("SUPERLU_MALLOC fails for llist.");
	            marker = (int *) SUPERLU_MALLOC(n*sizeof(int));
	            if ( !marker ) SUPERLU_ABORT("SUPERLU_MALLOC fails for marker.");

	            /* Transform adjacency list into 1-based indexing required by GENMMD.*/
	            for (i = 0; i <= n; ++i) ++b_colptr[i];
	            for (i = 0; i < bnz; ++i) ++b_rowind[i];
	
	            genmmd_(&n, b_colptr, b_rowind, perm_c, invp, &delta, dhead, 
		            qsize, llist, marker, &maxint, &nofsub);

	            /* Transform perm_c into 0-based indexing. */
	            for (i = 0; i < n; ++i) --perm_c[i];

	            SUPERLU_FREE(b_colptr);
	            SUPERLU_FREE(b_rowind);
	            SUPERLU_FREE(invp);
	            SUPERLU_FREE(dhead);
	            SUPERLU_FREE(qsize);
	            SUPERLU_FREE(llist);
	            SUPERLU_FREE(marker);

	            t = SuperLU_timer_() - t;
	            /*  printf("call GENMMD time = %8.3f\n", t);*/

                } else { /* Empty adjacency structure */
	            for (i = 0; i < n; ++i) perm_c[i] = i;
                }

            }
        };
    };
};